********************************************************************************
* This do file:
*		1. Create baseline interaction variables
* Last modified by: Jie Bai
* Date: 05/30/2017
********************************************************************************

clear all
set more off
set scheme s1color

***Set directory
capture cd "/Users/Dropbox/Chinese food exports"

***locals
local eventyear=2009

*******************************************
* Generate baseline interaction variables *
*******************************************

***Intensity (firm)
use "Data/ChineseCustomsData/customs_yearly_2000_2013.dta",clear
keep if import==0 

merge m:1 party_id using "Data/inspection_lists/contaminated_firmlist_customs.dta"   //mark contaminated firms
tab _m
gen affected_firms=.
replace affected_firms=1 if _m==3
replace affected_firms=0 if _m==1
drop if _m==2
drop _m

merge m:1 hs_id using "Data/inspection_lists/contaminated_productlist_customs.dta", keepusing(hs_id)   //mark contaminated products
tab _m
gen affected_products=.
replace affected_products=1 if _m==3
replace affected_products=0 if _m==1
drop if _m==2
drop _m

collapse (sum) value (mean) affected_firms affected_products, by (party_id hs_id)

bys party_id: egen aproducts_value=sum(value) if affected_products==1
sort party_id aproducts_value
bys party_id: replace aproducts_value=aproducts_value[1]

replace aproducts_value=0 if aproducts_value==.
replace aproducts_value=0 if affected_firms==0

bys party_id: egen total_value=sum(value)

collapse (mean) aproducts_value total_value, by(party_id)

gen intensity_afirms=aproducts_value/total_value

keep party_id intensity_afirms
drop if intensity_afirms==0
save "Data/Coded data/Customs/customs_intensityofafirms_bl",replace

***Intensity (products)
use "Data/ChineseCustomsData/customs_yearly_2000_2013.dta",clear
keep if import==0 
keep if year<=2007

merge m:1 hs_id using "Data/inspection_lists/contaminated_productlist_customs.dta", keepusing(hs_id)   //mark contaminated products
tab _m
gen affected_products=.
replace affected_products=1 if _m==3
replace affected_products=0 if _m==1
drop if _m==2
drop _m

merge m:1 party_id hs_id using "Data/inspection_lists/contaminated_list_full.dta", keepusing(party_id hs_id)
gen affected_firm_products=.
replace affected_firm_products=1 if _m==3
replace affected_firm_products=0 if _m==1
drop if _m==2
drop _m

collapse (sum) value (mean) affected_firm_products affected_products, by (party_id hs_id)

bys hs_id: egen aproducts_value=sum(value) if affected_firm_products==1
sort hs_id aproducts_value
bys hs_id: replace aproducts_value=aproducts_value[1]

replace aproducts_value=0 if aproducts_value==.
replace aproducts_value=0 if affected_products==0

bys hs_id: egen total_value=sum(value)

collapse (mean) aproducts_value total_value, by(hs_id)

gen intensity_aproducts=aproducts_value/total_value

keep hs_id intensity_aproducts
drop if intensity_aproducts==0
save "Data/Coded data/Customs/customs_intensityofaproducts_bl",replace

***location and ownership type
use "Data/ChineseCustomsData/customs_yearly_2000_2013.dta",clear
sort party_id type_id2
by party_id: replace type_id2=type_id2[1] if type_id2==.
keep party_id type_id2 
duplicates drop party_id type_id2,force

//isid party_id //some firms changed ownership type?
duplicates tag party_id,g(temp)
bys party_id: egen a=max(type_id2)
bys party_id: egen b=min(type_id2)
replace type_id2=1 if temp==1 & b==1 //replace these as state-owned
replace type_id2=3 if temp==1 & (a==3 |a==4)
keep party_id type_id2 
duplicates drop party_id type_id2,force
isid party_id
sort party_id
g firmprov=substr(party_id,1,2)

save "Data/Coded data/Customs/customs_firmchar_bl",replace


***Industries baseline average export size

//HS 4 level
use "Data/ChineseCustomsData/customs_yearly_2000_2013.dta",clear

keep if year<2008
keep if import==0 
replace value=value/10^6  //change to million dollars

g hs4digit=substr(hs_id,1,4)

collapse (sum) value,by(party_id hs4digit)
bys hs4digit:egen hs4avg_sales_bl=median(value)

duplicates drop hs4digit,force
keep hs4digit hs4avg_sales_bl

save "Data/Coded data/Customs/customs_hs4exports_bl",replace

//HS 2 level
use "Data/ChineseCustomsData/customs_yearly_2000_2013.dta",clear

keep if year<2008
keep if import==0 
replace value=value/10^6  //change to million dollars

g hs2digit=substr(hs_id,1,2)

collapse (sum) value,by(party_id hs2digit)
bys hs2digit:egen hs2avg_sales_bl=median(value)

duplicates drop hs2digit,force
keep hs2digit hs2avg_sales_bl

save "Data/Coded data/Customs/customs_hs2exports_bl",replace


***firms's baseline experience of exporting and export size
use "Data/ChineseCustomsData/customs_yearly_2000_2013.dta",clear

keep if year<2008
keep if import==0 
replace value=value/10^6  //change to million dollars

bys party_id: egen firm_sales_bl=total(value)
bys party_id: egen firm_sourceloc=mode(city_id)
duplicates drop party_id year,force
bys party_id: g firm_exp_bl=_N
duplicates drop party_id,force
keep party_id firm_exp_bl firm_sales_bl firm_sourceloc
tab firm_exp_bl,m  //27% one-year firms
sum firm_sales_bl,d
sort party_id

save "Data/Coded data/Customs/customs_firmexports_bl",replace


***firm's baseline experience of exporting a particular product
use "Data/ChineseCustomsData/customs_yearly_2000_2013.dta",clear

keep if year<2008
keep if import==0 
replace value=value/10^6  //change to million dollars

bys party_id hs_id: egen firmprod_sales_bl=total(value)
bys party_id hs_id: egen firmprod_sourceloc=mode(city_id)
duplicates drop party_id hs_id year,force
bys party_id hs_id: g firmprod_exp_bl=_N
duplicates drop party_id hs_id,force
keep party_id hs_id firmprod_exp_bl firmprod_sales_bl firmprod_sourceloc
tab firmprod_exp_bl,m
sum firmprod_sales_bl,d
sort party_id hs_id

save "Data/Coded data/Customs/customs_firmprodexports_bl",replace



***Google Trends data

use "Data/google_trends/gtrends_orig.dta", clear

gen gnewsindex=.
replace gnewsindex=2 if gnewsindex_orig>=1.6177370 & gnewsindex_orig!=.
replace gnewsindex=1 if gnewsindex_orig<1.6177370 & gnewsindex_orig!=.
replace gnewsindex=0 if gnewsindex==.

keep origin_id gnewsindex

save "Data/google_trends/gtrends.dta", replace


use "Data/google_trends/gtrends_orig.dta", clear

gen gnewsindex=.
replace gnewsindex=3 if gnewsindex_orig>=1.617737 & gnewsindex_orig!=.
replace gnewsindex=2 if gnewsindex_orig>=1.15798 & gnewsindex_orig<1.617737 & gnewsindex_orig!=.
replace gnewsindex=1 if gnewsindex_orig>=.5057803 & gnewsindex_orig<1.15798 & gnewsindex_orig!=.
replace gnewsindex=0 if gnewsindex==.

keep origin_id gnewsindex

save "Data/google_trends/gtrends1.dta", replace


